#' reading in:
#'   manifesto_party_year.csv  (written out by 2-manifesto-hawkdove.R)
#'   WhoGov_within_V2.0.csv  (original data from WhoGov)
#' cleaning:
#'   fixing a bunch of individual cases with 0 or >1 leader, m_defense, and/or m_foreignaffairs
#' writing out (in process_data folder):
#'   whogov_clean.csv 


# reading in and initial cleanup

m = read_csv('process_data/manifesto_party_year.csv')

w = read_csv("source_data/WhoGov_within_V2.0.csv")

# subset w to countries that appear in m

m_iso = m |> pull(country_isocode) |> unique() |> sort()
table(w$country_isocode %in%  m_iso) # 80485 T, 169472 F

w = w |> filter(country_isocode %in% m_iso)

# drop obs within w that are missing all name and party info
table(is.na(w$name) & is.na(w$party) & is.na(w$party_english)) # 628
w = w |> filter(!is.na(name) | !is.na(party) | !is.na(party_english))

# drop Costa Rica and Panama (no militaries)
w = w |> filter(!country_isocode %in% c('CRI','PAN'))


# each country-year in w should have exactly 1 leader, 1 m_defense, 1 m_foreignaffairs
wcy_check = w |>
  group_by(country_isocode, year) |>
  summarise(n_leader = sum(leader),
            n_mindef = sum(m_defense),
            n_mfa = sum(m_foreignaffairs)) |> ungroup()

table(wcy_check$n_leader) # 2 x 0, 3010 x 1
table(wcy_check$n_mindef) # 133 x 0, 12 x 2, 2 x 3, 2865 x 1
table(wcy_check$n_mfa) # 34 x 0, 18 x 2, 2960 x 1


#### CLEANING UP LEADERS #####


wcy_check |> filter(n_leader==0) # ARG 1973, TUR 1996
w |> filter(country_isocode=='ARG' & year==1973) |> select(position, name)
# all info missing for this country-year, drop it
w = w |> filter(! (country_isocode=='ARG' & year==1973))

w |> filter(country_isocode=='TUR' & year==1996) |> select(position, name)
# PM from June 1996 - June 1997 was  Necmettin Erbakan, 
#  seems that he didn't have a party at the time, so leaving as is


#### CLEANING UP M_DEFENSE ####


#  clearly some obs that are minister of defense, but miscoded as m_defense = 0
#  to start, look for positions with portfolios starting with "Defense"

w = w |> mutate(country_year = paste(country_name, year, sep=' - '))
wcy_vec = unique(w$country_year)

# iterate through all country-years
for(z in wcy_vec){
  zsub = w |> subset(country_year==z)
  # for this country-year, any obs with m_defense==1?
  any_mindef = zsub |> select(m_defense) %>% sum() %>% `>`(.,0)
  if(any_mindef){next}
  # if not, then: any obs with "Defense" in portfolio_1?
  any_defport = zsub %>% select(portfolio_1) %>% grepl('Defense',.) %>% sum() %>% `>`(.,0)
  # if so, print the country-year
  if(any_defport){
    cat(z, ' ')
  }
}
# Azerbaijan (Azerbaijani Republic) - 1993  
#'   only missing in 1993; 'Min. Of National Security' has "defense" in portfolio, 
#'    but it exists in other years alongside 'Min. Of Defense'
#' France - 2017  France - 2018  - fixing below
#' Spain - 1977  Spain - 1978   - fixing below
#' Moldova - 2017  - vacant for most of 2017


# fixing France and Spain
w = w |> 
  mutate(
    m_defense = case_when(
      country_isocode=='FRA' & position=='Min. Of The Armed Forces' ~ 1,
      country_isocode=='ESP' & str_detect(position, '1St Dep. Prime Min. For Def') & 
        year %in% c(1977,1978) ~ 1,
      T ~ m_defense
    )
  )


# other cases:
w |> group_by(country_isocode, year) |> 
  summarise(n_mindef = sum(m_defense)) |>
  filter(n_mindef!=1) |>
  group_by(country_isocode) |>
  summarise(n_yr = n(), first_yr= min(year), last_yr = max(year))
#   country_isocode  n_yr first_yr last_yr
#   <chr>           <int>    <dbl>   <dbl>
# 1 AZE                 1     1993    1993
# 2 BIH                 9     1996    2005
# 3 CAN                 2     2006    2007
# 4 COL                 1     2001    2001
# 5 DOM                 1     1966    1966
# 6 HRV                 1     2020    2020
# 7 ISL                56     1966    2021
# 8 JPN                 1     1982    1982
# 9 MDA                 2     1992    2017
# 10 MKD                 1     2020    2020
# 11 MLT                46     1966    2012
# 12 MNE                 9     1997    2005
# 13 ROU                 1     2004    2004
# 14 SRB                 9     1997    2005
# 15 SWE                 2     2019    2020


# Iceland: 
#  1971 and 1974, only have president (no info other ministers), so drop these obs
w = w |> filter(! (country_isocode=='ISL' & year %in% c(1971, 1974)))
# otherwise: "The Minister for Foreign Affairs is the central authority for defence"
#  https://www.government.is/topics/foreign-affairs/national-security/

# Malta: no office with defense in portfolio prior to 2013

# Colombia, two m_defense in 2001: 
#  Luis Fernando Ramirez Acuna and Gustavo Bell Lemus both listed as m_defense in 2001
#  transition happened May-June, so Acuna was already out
w |> filter(country_isocode=='COL' & year%in% c(1999:2003) & m_defense==1) |> 
  select(year,position, name, portfolio_1, m_defense, m_foreignaffairs) |> print(n=Inf)
# droping Acuna 2001
w = w |> filter(
  ! (name == 'Luis Fernando Ramirez Acuna' & year == 2001  )
)

# SWE, 2019:2020
w |> filter(country_isocode=='SWE' & year%in% c(2018:2022) & m_defense==1) |> 
  select(year,position, name, portfolio_1, m_defense, m_foreignaffairs) |> print(n=Inf)
# in each of 2019 and 2020, Min. For Rural Affairs is m_defense=1, clearly typo

# MKD, 2020
w |> filter(country_isocode=='MKD' & year%in% c(2018:2022) & m_defense==1) |> 
  select(year,position, name, portfolio_1, m_defense, m_foreignaffairs) |> print(n=Inf)
# in 2020, Min. of Political System and Inter-Ethnic Relations is m_defense=1, clearly typo


# fixing various one-off problems

w = w %>% mutate(
  m_defense = case_when(
    
    # Romania 2004: two m_defense
    #  one is Ioan Mircea Pascu, "Min. of Defense"
    #  other is Ioan Talpes, "Dep. Prime Min For ...", both same party
    country_name=='Romania' & year==2004 & name=='Ioan Talpes' ~ 0,
    
    # BIH has multiple m_defenses up through 2005
    #  one for Federation, one for Republika Srpska, 
    #  and then one national Minister of Defense starting 2004
    # prior to 2003, I'm coding 'Min. Of Defense (Federation)' as the only m_defense
    country_isocode=='BIH' & position=='Min. Of Defense (Republika Srpska)' ~ 0,
    country_isocode=='BIH' & position=='Min. Of Defense (Federation)' & year > 2003 ~ 0,
    
    # Candaa, 2006-2007: 
    #  Gordon O'Connor was Minister of National Defense feb 2006 to aug 2007,
    #   then moved to Minister of National Revenue aug 2007
    #  is miscoded as Minister of National Revenue for 2006-2007
    #  (with no one coded as m_defense=1)
    country_name=='Canada' & name=="Gordon O'Connor" & year %in% c(2006,2007) ~ 1,
    
    # all cases of Iceland
    country_isocode=='ISL' & str_detect(portfolio_1, 'Foreign Relations') & 
      position != 'Min. Of Environment & Nordic Cooperation' ~ 1,

    # these two were miscoded as m_defense=1
    country_isocode=='SWE' & position== 'Min. For Rural Affairs' ~ 0,
    country_isocode=='MKD' & position== 'Min. of Political System and Inter-Ethnic Relations' ~ 0,
    
    T~m_defense
  )
)


# Japan 1982: filling in manually
w = w |> add_row(
  year= 1982,
  country_isocode='JPN',
  country_name= 'Japan',
  position='Dir. Gen., Japan Defense Agency',
  name='Soichiro Ito',
  title='',
  gender='Male',
  birthyear=1924,
  deadyear='2001',
  party='ldp',
  whogov_partyid = 'JPN-ldp',
  party_english ='Liberal Democratic Party',
  leader=0,
  m_defense=1,
  m_foreignaffairs=0
)


#### CLEANING UP M_FOREIGNAFFAIRS ####

# cases with >1 min foreign affairs
w |> 
  group_by(country_isocode, year) |>
  summarise(n_mfa = sum(m_foreignaffairs)) |>
  filter(n_mfa>1) |>
  group_by(country_isocode) |> 
  summarise(n = n(), first_yr = min(year), last_yr = max(year))

# country_isocode     n first_yr last_yr
# <chr>           <int>    <dbl>   <dbl>
# 1 BIH                 2     1996    1997
# 2 EST                 3     1992    2015
# 3 ISR                 1     2000    2000
# 4 MDA                 1     1993    1993
# 5 NOR                 9     1990    2001
# 6 PRT                 1     1975    1975
# 7 SRB                 1     2013    2013

# BIH: dropping MFA for Republika Srpska
w |> 
  filter(country_isocode=='BIH' & year %in% c(1996, 1997) & m_foreignaffairs==1) |>
  select(year, position, name, portfolio_1, party, party_english)

# EST: 3 years where minister with portfolio_1 = 'Foreign Economic Relations' is m_foreignaiffairs
w |> 
  filter(country_isocode=='EST'  & m_foreignaffairs==1) |>
  select(year, position, name, portfolio_1) |> print(n=Inf)

# ISR: "Dep. Min. Of Foreign Affairs" in 2000
w |> 
  filter(country_isocode=='ISR'  & m_foreignaffairs==1) |>
  select(year, position, name, portfolio_1) |> print(n=Inf)

# MDA: 'Min. Of Foreign Economic Relations' coded m_foreignaffairs=1 in 1993
w |> 
  filter(country_isocode=='MDA'  & m_foreignaffairs==1) |>
  select(year, position, name, portfolio_1) |> print(n=Inf)

# NOR: 9 obs where minister with portfolio_1 = "Foreign Economic Relations"
w |> 
  filter(country_isocode=='NOR'  & m_foreignaffairs==1) |>
  select(year, position, name, portfolio_1) |> print(n=Inf)

# PRT: "Min. Of Foreign Trade" in 1975
w |> 
  filter(country_isocode=='PRT'  & m_foreignaffairs==1) |>
  select(year, position, name, portfolio_1) |> print(n=Inf)

# SRB: minister with "Foreign Economic Relations" portfolio_1 in 2013
w |> 
  filter(country_isocode=='SRB'  & m_foreignaffairs==1) |>
  select(year, position, name, portfolio_1) |> print(n=Inf)


# fixing these
w = w |> 
  mutate(
    m_foreignaffairs = case_when(
      country_isocode=='BIH' & position=='Min. Of Foreign Affairs (Republika Srpska)' ~ 0,
      country_isocode=='EST' & portfolio_1=='Foreign Economic Relations' ~ 0,
      country_isocode=='ISR' & position == 'Dep. Min. Of Foreign Affairs' ~ 0,
      country_isocode=='MDA' & portfolio_1=='Foreign Economic Relations' ~ 0,
      country_isocode=='NOR' & portfolio_1 == 'Foreign Economic Relations' ~ 0,
      country_isocode=='PRT' & position=='Min. Of Foreign Trade' ~ 0,
      country_isocode=='SRB' & portfolio_1 == 'Foreign Economic Relations' ~ 0,
      T ~ m_foreignaffairs
    )
  )


# cases with 0 min foreign affairs

w |>
  group_by(country_isocode, year) |>
  summarise(n_leader = sum(leader),
            n_mindef = sum(m_defense),
            n_mfa = sum(m_foreignaffairs)) |> ungroup() |>
  filter(n_mfa==0) |>
  group_by(country_isocode) |>
  summarise(min_yr = min(year), max_yr = max(year), n_yr = n())

# country_isocode min_yr max_yr  n_yr
# <chr>            <dbl>  <dbl> <int>
# 1 CAN               1984   1984     1
# 2 CHE               1966   1979    14
# 3 ISR               2013   2013     1
# 4 MDA               1992   1992     1
# 5 MLT               2008   2008     1
# 6 MNE               1997   2016     2
# 7 SRB               1997   2005     9
# 8 URY               1968   1968     1
# 9 ZAF               1970   1970     1


# Switzerland:
w |> filter(country_isocode=='CHE' & year %in% 1966:1979) |>
  filter(str_detect(portfolio_1, 'Foreign')) |>
  select(year, position, name, portfolio_1) |> print(n=Inf)

#' couple issues with CHE:
#'  from 1966--1979, "Min. Political Dept" or "Min. , Political Dept." is the foreign minister
#'  in 1980, "Willi Ritschard" is listed as "Min. , Dept. Of Foreign Affairs"
#'    and "Pierre Aubert" is listed as "Min. , Dept. Of Finance & Customs"
#'    (but Ubert is MFA in years before and after)

# fixing:
w = w |> mutate(
  m_foreignaffairs = case_when(
    name=='Pierre Aubert' & year==1980 ~ 1,
    name=='Willi Ritschard' & year==1980 ~ 0,
    country_isocode=='CHE' & year < 1980 & 
      position %in% c("Min. Political Dept" , "Min. , Political Dept.") ~ 1,
    T ~ m_foreignaffairs
  )
)

# Serbia
w |> filter(country_isocode=='SRB' & year <=2006) |>
  filter(str_detect(portfolio_1, 'Foreign'))|>
  select(year, position, name, portfolio_1) |> print(n=Inf)

# Serbia: Vuk Draskovic only listed starting 2006
#  he was MFA of Serbia and Montenegro 2004-2006
#  but no Serbia MFA listed prior to 2006
# leaving as is


##### WRITING OUT #####

w |> write_csv('process_data/whogov_clean.csv')

rm(list=ls()[ls()!='replication_wd'])
